Asian American Quality of Life: Analysis

Author

Miguel Fudolig, Luke Cho, Lawrence Kim, Boya Liu

library(tidyverse)
library(ggplot2)
library(lavaan)
library(car)

Data set

This data set is from the 2015 Asian American Quality of Life survey. Participants are from Austin, Texas.

Input data set

qol <- read_csv("AAQoL.csv") |> mutate(across(where(is.character), ~as.factor(.x)))
New names:
Rows: 2609 Columns: 231
── Column specification
──────────────────────────────────────────────────────── Delimiter: "," chr
(190): Gender, Ethnicity, Marital Status, No One, Spouse, Children, Gran... dbl
(41): Survey ID, Age, Education Completed, Household Size, Grandparent,...
ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
Specify the column types or set `show_col_types = FALSE` to quiet this message.
• `Other` -> `Other...17`
• `Other` -> `Other...89`
qol |> DT::datatable()
Warning in instance$preRenderHook(instance): It seems your data is too big for
client-side DataTables. You may consider server-side processing:
https://rstudio.github.io/DT/server.html

There are 2,609 responses, some with missing data.

Summary statistics

Gender

qol |> group_by(`Gender`) |> summarize(n=n()) |>
  mutate(pct = n/sum(n)*100) |> 
  arrange(desc(n))
# A tibble: 3 × 3
  Gender     n   pct
  <fct>  <int> <dbl>
1 Female  1425 54.6 
2 Male    1157 44.3 
3 <NA>      27  1.03

Age

qol |> filter(!is.na(Age)) |> summarize(age=mean(Age),SD=sd(Age),min=min(Age),max=max(Age))
# A tibble: 1 × 4
    age    SD   min   max
  <dbl> <dbl> <dbl> <dbl>
1  42.9  17.1    18    98

Ethnicity

qol |> group_by(Ethnicity) |> summarize(n=n()) |> mutate(pct = n/sum(n)*100) |> 
  arrange(desc(n))
# A tibble: 7 × 3
  Ethnicity        n     pct
  <fct>        <int>   <dbl>
1 Chinese        639 24.5   
2 Asian Indian   574 22.0   
3 Vietnamese     514 19.7   
4 Korean         471 18.1   
5 Filipino       265 10.2   
6 Other          144  5.52  
7 <NA>             2  0.0767

Marital Status

qol |> group_by(`Marital Status`) |> summarize(n=n()) |> mutate(pct = n/sum(n)*100) |> 
  arrange(desc(n))
# A tibble: 5 × 3
  `Marital Status`          n    pct
  <fct>                 <int>  <dbl>
1 Married                1725 66.1  
2 Single                  726 27.8  
3 Living with a partner   110  4.22 
4 Other                    30  1.15 
5 <NA>                     18  0.690

Living Alone

qol |> mutate(alone=if_else(`No One`==0,"With Others","Alone")) |>
  mutate(alone=factor(alone, levels=c("Alone","With Others"))) |> 
  group_by(alone) |> 
  summarize(n=n()) |> 
  mutate(pct = n/sum(n)*100) |> 
  arrange(desc(n))
# A tibble: 3 × 3
  alone           n    pct
  <fct>       <int>  <dbl>
1 With Others  2392 91.7  
2 Alone         212  8.13 
3 <NA>            5  0.192

Religion

qol |> group_by(Religion) |> summarize(n=n()) |> mutate(pct = n/sum(n)*100) |> 
  arrange(desc(n))
# A tibble: 8 × 3
  Religion       n    pct
  <fct>      <int>  <dbl>
1 Protestant   645 24.7  
2 None         506 19.4  
3 Catholic     492 18.9  
4 Hindu        479 18.4  
5 Buddhist     350 13.4  
6 Muslim        68  2.61 
7 Other         47  1.80 
8 <NA>          22  0.843

Employment

qol |> mutate(`Full Time Employment`= ifelse(`Full Time Employment`==0,"No","Yes")) |> 
                group_by(`Full Time Employment`) |> summarize(n=n()) |>
  mutate(pct = n/sum(n)*100) |> 
  arrange(desc(n))
# A tibble: 3 × 3
  `Full Time Employment`     n    pct
  <chr>                  <int>  <dbl>
1 No                      1458 55.9  
2 Yes                     1144 43.8  
3 <NA>                       7  0.268

US Born

qol |>group_by(`US Born`) |> summarize(n=n()) |>
  mutate(pct = n/sum(n)*100) |> 
  arrange(desc(n))
# A tibble: 3 × 3
  `US Born`     n    pct
  <fct>     <int>  <dbl>
1 No         2353 90.2  
2 Yes         239  9.16 
3 <NA>         17  0.652

Duration

qol |> filter(!is.na(`Duration of Residency`)) |> 
  summarize(mean=mean(`Duration of Residency`), SD = sd(`Duration of Residency`))
# A tibble: 1 × 2
   mean    SD
  <dbl> <dbl>
1  15.6  12.7

English Speaking and Difficulty

Primary Speakers

1 = Primary Speaker, 0 = non-primary speaker

qol |> group_by(`Primary Language`) |> summarize(n=n()) |>
  mutate(pct = n/sum(n)*100) |> 
  arrange(desc(n))
# A tibble: 3 × 3
  `Primary Language`     n    pct
               <dbl> <int>  <dbl>
1                  1  1725 66.1  
2                  0   859 32.9  
3                 NA    25  0.958

Profiency

qol |> group_by(`English Speaking`) |> summarize(n=n()) |>
  mutate(pct = n/sum(n)*100) |> 
  arrange(desc(n))
# A tibble: 5 × 3
  `English Speaking`     n    pct
  <fct>              <int>  <dbl>
1 Very well            974 37.3  
2 Well                 808 31.0  
3 Not well             632 24.2  
4 Not at all           177  6.78 
5 <NA>                  18  0.690
qol |> group_by(`English Difficulties`) |> summarize(n=n()) |>
  mutate(pct = n/sum(n)*100) |> 
  arrange(desc(n))
# A tibble: 5 × 3
  `English Difficulties`     n   pct
  <fct>                  <int> <dbl>
1 Not at all               772 29.6 
2 Not much                 733 28.1 
3 Much                     549 21.0 
4 Very much                516 19.8 
5 <NA>                      39  1.49

Familiarity with America and Ethnic Origin

qol |> group_by(`Familiarity with America`) |> summarize(n=n()) |>
  mutate(pct = n/sum(n)*100) |> 
  arrange(desc(n))
# A tibble: 5 × 3
  `Familiarity with America`     n   pct
  <fct>                      <int> <dbl>
1 High                        1238 47.5 
2 Low                          721 27.6 
3 Very high                    498 19.1 
4 Very low                     123  4.71
5 <NA>                          29  1.11
qol |> group_by(`Familiarity with Ethnic Origin`) |> summarize(n=n()) |>
  mutate(pct = n/sum(n)*100) |> 
  arrange(desc(n))
# A tibble: 5 × 3
  `Familiarity with Ethnic Origin`     n   pct
  <fct>                            <int> <dbl>
1 High                              1369 52.5 
2 Very high                          864 33.1 
3 Low                                295 11.3 
4 Very low                            51  1.95
5 <NA>                                30  1.15

Comparing Familiarity with Ethnic Origin and America,

qol |> group_by(`Familiarity with Ethnic Origin`,`Familiarity with America`) |> summarize(n=n()) |>
  mutate(pct = n/sum(n)*100) |> 
  arrange(desc(n))
`summarise()` has grouped output by 'Familiarity with Ethnic Origin'. You can
override using the `.groups` argument.
# A tibble: 23 × 4
# Groups:   Familiarity with Ethnic Origin [5]
   `Familiarity with Ethnic Origin` `Familiarity with America`     n   pct
   <fct>                            <fct>                      <int> <dbl>
 1 High                             High                         711 51.9 
 2 High                             Low                          431 31.5 
 3 Very high                        High                         422 48.8 
 4 Very high                        Very high                    250 28.9 
 5 Very high                        Low                          167 19.3 
 6 High                             Very high                    165 12.1 
 7 Low                              Low                          109 36.9 
 8 Low                              High                          91 30.8 
 9 Low                              Very high                     67 22.7 
10 High                             Very low                      51  3.73
# ℹ 13 more rows

Identifying to the ethnic community

qol |> group_by(`Identify Ethnically`) |> summarize(n=n()) |>
  mutate(pct = n/sum(n)*100) |> 
  arrange(desc(n))
# A tibble: 5 × 3
  `Identify Ethnically`     n   pct
  <fct>                 <int> <dbl>
1 Somewhat close         1211 46.4 
2 Very close             1039 39.8 
3 Not very close          293 11.2 
4 <NA>                     35  1.34
5 Not at all               31  1.19

Belonging to the ethnic community

qol |> group_by(`Belonging`) |> summarize(n=n()) |>
  mutate(pct = n/sum(n)*100) |> 
  arrange(desc(n))
# A tibble: 5 × 3
  Belonging         n   pct
  <fct>         <int> <dbl>
1 Somewhat       1255 48.1 
2 Very much       766 29.4 
3 Not very much   450 17.2 
4 Not at all       87  3.33
5 <NA>             51  1.95

Perceived Discrimination

qol |> group_by(Discrimination) |> summarize(n=n()) |>
  mutate(pct = n/sum(n)*100) |> 
  arrange(desc(n))
# A tibble: 3 × 3
  Discrimination     n   pct
           <dbl> <int> <dbl>
1              0  1598  61.2
2              1   694  26.6
3             NA   317  12.2

Income

qol |> group_by(Income) |> summarize(n=n()) |>
  mutate(pct = n/sum(n)*100) |> 
  arrange(desc(n))
# A tibble: 9 × 3
  Income                n   pct
  <fct>             <int> <dbl>
1 $70,000 and over    993 38.1 
2 $0 - $9,999         254  9.74
3 $30,000 - $39,999   207  7.93
4 $10,000 - $19,999   205  7.86
5 <NA>                203  7.78
6 $20,000 - $29,999   198  7.59
7 $60,000 - $69,999   190  7.28
8 $40,000 - $49,999   181  6.94
9 $50,000 - $59,999   178  6.82

Analysis

Income

Source of Information association with income after controlling for ethnicity.

qol_1 <- qol |> select(Family,Income,Ethnicity) %>% filter(complete.cases(.)) |> 
  filter(Family %in%c("Yes","No")) |>  
  mutate(Family=droplevels(Family)) 
glm(Family~Income+Ethnicity,data=qol_1,family="binomial") -> mod1
summary(mod1)

Call:
glm(formula = Family ~ Income + Ethnicity, family = "binomial", 
    data = qol_1)

Coefficients:
                        Estimate Std. Error z value Pr(>|z|)    
(Intercept)               0.5431     0.1511   3.594 0.000325 ***
Income$10,000 - $19,999  -0.3912     0.1949  -2.007 0.044766 *  
Income$20,000 - $29,999  -0.4313     0.1966  -2.194 0.028210 *  
Income$30,000 - $39,999  -0.5247     0.1947  -2.694 0.007051 ** 
Income$40,000 - $49,999  -0.6100     0.2016  -3.026 0.002480 ** 
Income$50,000 - $59,999  -0.4562     0.2016  -2.263 0.023616 *  
Income$60,000 - $69,999  -0.6366     0.1976  -3.222 0.001274 ** 
Income$70,000 and over   -0.5787     0.1470  -3.936  8.3e-05 ***
EthnicityChinese          0.3843     0.1246   3.085 0.002036 ** 
EthnicityFilipino         0.1972     0.1577   1.250 0.211119    
EthnicityKorean          -0.2149     0.1316  -1.633 0.102571    
EthnicityOther           -0.4271     0.2000  -2.135 0.032721 *  
EthnicityVietnamese      -0.3265     0.1327  -2.461 0.013854 *  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 3317.8  on 2393  degrees of freedom
Residual deviance: 3252.8  on 2381  degrees of freedom
AIC: 3278.8

Number of Fisher Scoring iterations: 4
car::Anova(mod1)
Analysis of Deviance Table (Type II tests)

Response: Family
          LR Chisq Df Pr(>Chisq)    
Income       18.48  7   0.009983 ** 
Ethnicity    44.93  5  1.499e-08 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Source of Information: Close Friends association with income after controlling for ethnicity.

qol_1 <- qol |> select(`Close Friends`,Income,Ethnicity) %>% filter(complete.cases(.)) |> 
  group_by(`Close Friends`,Income,Ethnicity) |>  summarize(n=n())
`summarise()` has grouped output by 'Close Friends', 'Income'. You can override
using the `.groups` argument.
corr_table <- xtabs(n~ `Close Friends` +Income + Ethnicity, data=qol_1)
#corr_table
mantelhaen.test(corr_table)

    Cochran-Mantel-Haenszel test

data:  corr_table
Cochran-Mantel-Haenszel M^2 = 59.769, df = 35, p-value = 0.005648